/*
 * Copyright (c) 2017 Intel Corporation
 *
 * SPDX-License-Identifier: Apache-2.0
 */

#include <zephyr/toolchain.h>
#include <zephyr/arch/cpu.h>
#include <offsets_short.h>
#include <zephyr/syscall.h>
#include <zephyr/kernel/mm.h>

#ifdef CONFIG_X86_KPTI
/* Copy interrupt return stack context to the trampoline stack, switch back
 * to the user page table, and only then 'iret'. We jump to this instead
 * of calling 'iret' if KPTI is turned on. This must be invoked with interrupts
 * locked.
 *
 * Stack layout is expected to be what 'iretq' expects, which is as follows:
 *
 * 32 SS
 * 24 RSP
 * 16 RFLAGS
 * 8  CS
 * 0  RIP
 */
.global z_x86_trampoline_to_user
z_x86_trampoline_to_user:
	/* Stash EDI, need a free register */
	pushq	%rdi

	/* Store old stack pointer and switch to trampoline stack */
	movq	%rsp, %rdi
	movq	%gs:__x86_tss64_t_ist2_OFFSET, %rsp

	/* Copy context */
	pushq	40(%rdi)	/* SS */
	pushq	32(%rdi)	/* RSP */
	pushq	24(%rdi)	/* RFLAGS */
	pushq	16(%rdi)	/* CS */
	pushq   8(%rdi)		/* RIP */
	xchgq	%rdi, (%rdi)	/* Exchange old rdi to restore it and put
				   trampoline stack address in its old storage
				   area */

	/* Switch to thread's page table */
	pushq	%rax
	movq	%gs:__x86_tss64_t_cpu_OFFSET, %rax
	movq	___cpu_t_current_OFFSET(%rax), %rax
	movq	_thread_offset_to_ptables(%rax), %rax
	movq	%rax, %cr3
	popq	%rax
	movq	$0, -8(%rsp)	/* Delete stashed RAX data */

	/* Trampoline stack should have nothing sensitive in it at this point */
	swapgs
	iretq
#endif /* CONFIG_X86_KPTI */


/* Landing site for 'syscall' instruction
 *
 * Call id is in RAX
 * Arguments are in RDI, RSI, RDX, R10, R8, R9
 * Return address stored by CPU in RCX
 * User RFLAGS store by CPU in R11
 * Current RFLAGS has been masked with ~X86_FMASK_MSR
 */
.global z_x86_syscall_entry_stub
z_x86_syscall_entry_stub:
	swapgs

	/* Save original stack pointer from user mode in memory, at the
	 * moment we have no free registers or stack to save it to. This
	 * eventually gets put on the stack before we re-enable interrupts
	 * as this is a per-cpu and not per-thread area.
	 */
	movq	%rsp, %gs:__x86_tss64_t_usp_OFFSET

#ifdef CONFIG_X86_KPTI
	/* We need to switch to the trampoline stack so that we can
	 * switch to the kernel's page table
	 */
	movq	%gs:__x86_tss64_t_ist2_OFFSET, %rsp

	/* Load kernel's page table */
	pushq	%rax

	/* NOTE: Presumes phys=virt */
	movq	$K_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
	movq	%rax, %cr3
	popq	%rax
	movq	$0, -8(%rsp)	/* Delete stashed RAX data */
#endif /* CONFIG_X86_KPTI */

	/* Switch to the privilege mode stack pointer stored in
	 * x86_tss64.psp
	 */
	movq	%gs:__x86_tss64_t_psp_OFFSET, %rsp

	/* We're now on the privilege mode stack; push the old user stack
	 * pointer onto it
	 */
	pushq	%gs:__x86_tss64_t_usp_OFFSET
#ifdef CONFIG_X86_KPTI
	movq	$0, %gs:__x86_tss64_t_usp_OFFSET
#endif

	sti			/* re-enable interrupts */

	/* call_id is in RAX. bounds-check it, must be less than
	 * K_SYSCALL_LIMIT.
	 */
	cmp	$K_SYSCALL_LIMIT, %rax
	jae	_bad_syscall

_id_ok:
#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
	/* Prevent speculation with bogus system call IDs */
	lfence
#endif

	/* Remaining registers not involved in the syscall operation are
	 * RBX, RBP, R12-R15, plus floating point / SIMD registers.
	 *
	 * We save caller-saved registers so we can restore to original values
	 * when we call 'sysretq' at the end.
	 */
	pushq	%rdi
	subq	$X86_FXSAVE_SIZE, %rsp
	fxsave	(%rsp)
	pushq	%rsi
	pushq	%rdx
	pushq	%r8
	pushq	%r9
	pushq	%r10
	pushq	%r11	/* RFLAGS */
	pushq	%rcx	/* Return address stored by 'syscall' */
	pushq	%rsp	/* SSF parameter */

	/* All other args are in the right registers, except arg4 which
	 * we had to put in r10 instead of RCX
	 */
	movq	%r10, %rcx

	/* from the call ID in RAX, load R10 with the actual function pointer
	 * to call by looking it up in the system call dispatch table
	 */
	xorq	%r11, %r11
	movq	_k_syscall_table(%r11, %rax, 8), %r10

	/* Run the marshal function, which is some entry in _k_syscall_table */
	call	*%r10

	/* RAX now contains the return value
	 *
	 * Callee-saved registers are un-touched from original values per C
	 * calling convention, but sensitive data may lurk in caller-saved regs
	 * RDI, RSI, RDX, R8, R9, R10, XMM* after we have serviced the system
	 * call.  We saved them earlier, restore their original values when
	 * the syscall was made. This also preserves these registers if they
	 * were not used as arguments.
	 *
	 * We also can't have RCX and R11 clobbered as we need the original
	 * values to successfully 'sysretq'.
	 */
	addq	$8, %rsp	/* Discard ssf */
	popq	%rcx	/* Restore return address for 'sysretq' */
	popq	%r11	/* Restore RFLAGS for 'sysretq' */
	popq	%r10
	popq	%r9
	popq	%r8
	popq	%rdx
	popq	%rsi
	fxrstor	(%rsp)
	addq	$X86_FXSAVE_SIZE, %rsp
	popq	%rdi

#ifdef CONFIG_X86_KPTI
	/* Lock IRQs as we are using per-cpu memory areas and the
	 * trampoline stack
	 */
	cli

	/* Stash user stack pointer and switch to trampoline stack */
	popq	%gs:__x86_tss64_t_usp_OFFSET
	movq	%gs:__x86_tss64_t_ist2_OFFSET, %rsp

	/* Switch to thread's page table */
	pushq	%rax
	movq	%gs:__x86_tss64_t_cpu_OFFSET, %rax
	movq	___cpu_t_current_OFFSET(%rax), %rax
	movq	_thread_offset_to_ptables(%rax), %rax
	movq	%rax, %cr3
	popq	%rax
	movq	$0, -8(%rsp)	/* Delete stashed RAX data */

	/* Restore saved user stack pointer */
	movq	%gs:__x86_tss64_t_usp_OFFSET, %rsp
	movq	$0, %gs:__x86_tss64_t_usp_OFFSET
#else
	/* Restore user stack pointer */
	popq	%rsp

	/* Return to user mode, locking interrupts as the normal interrupt
	 * handling path will get very confused if it occurs between
	 * 'swapgs' and 'sysretq'
	 */
	cli
#endif /* CONFIG_X86_KPTI */

	swapgs
	sysretq

_bad_syscall:
	/* RAX had a bogus syscall value in it, replace with the bad syscall
	 * handler's ID, and put the bad ID as its first argument.
	 *
	 * TODO: On this and all other arches, simply immediately return
	 * with -ENOSYS, once all syscalls have a return value
	 */
	movq	%rax, %rdi
	movq	$K_SYSCALL_BAD, %rax
	jmp	_id_ok

/*
 * size_t arch_user_string_nlen(const char *s, size_t maxsize, int *err_arg)
 *                              ^ RDI          ^ RSI           ^ RDX
 */
.global arch_user_string_nlen
arch_user_string_nlen:
	/* Initial error value, strlen_done adjusts this if we succeed */
	movl	$-1, %r8d

	/* use RAX as our length count (this function's return value) */
	xor	%rax, %rax

	/* This code might page fault */
strlen_loop:
.global z_x86_user_string_nlen_fault_start
z_x86_user_string_nlen_fault_start:
	cmpb	$0x0, (%rdi, %rax, 1)	/* *(RDI + RAX) == 0? Could fault. */

.global z_x86_user_string_nlen_fault_end
z_x86_user_string_nlen_fault_end:
	je	strlen_done
	cmp	%rsi, %rax		/* Max length reached? */
	je	strlen_done
	inc	%rax			/* EAX++ and loop again */
	jmp	strlen_loop

strlen_done:
	/* Set error value to 0 since we succeeded */
	xorl	%r8d, %r8d

.global z_x86_user_string_nlen_fixup
z_x86_user_string_nlen_fixup:
	/* Write error value to 32-bit integer err pointer parameter */
	movl	%r8d, (%rdx)
	retq

/*
 * Trampoline function to put the p3 parameter in the register expected
 * by the calling convention, we couldn't use RCX when we called 'sysret'
 */
z_x86_userspace_landing_site:
	/* Place argument 4 in the correct position */
	movq	%r10, %rcx
	call	z_thread_entry

/* FUNC_NORETURN void z_x86_userspace_enter(
 *		k_thread_entry_t user_entry,	<- RDI
 *		void *p1, void *p2, void *p3,	<- RSI, RDX, RCX
 *		uintptr_t stack_end,		<- R8
 *		uintptr_t stack_start)		<- R9
 *
 * A one-way trip to userspace.
 */
.global z_x86_userspace_enter
z_x86_userspace_enter:
	/* RCX is sysret return address, pass along p3 in r10,
	 * z_x86_userspace_landing_site will fix this up
	 */
	movq	%rcx, %r10

	/* switch to privilege mode stack so we can erase thread stack buffer,
	 * the buffer is the page immediately before the thread stack
	 */
	movq	%r9, %rsp

	/* Push callee-saved regs and go back into C code to erase the stack
	 * buffer and set US bit in page tables for it
	 */
	pushq	%rdx
	pushq	%rsi
	pushq	%rdi
	pushq	%r8
	pushq	%r10
	callq	z_x86_current_stack_perms
	popq	%r10
	popq	%r8
	popq	%rdi
	popq	%rsi
	popq	%rdx

	/* Reset to the beginning of the user stack */
	movq	%r8, %rsp

	/* set sysret entry point */
	movq	$z_x86_userspace_landing_site, %rcx

	/* Copy RFLAGS into r11, required by sysret */
	pushfq
	movq	(%rsp), %r11
	movq	$0, (%rsp)	/* Now a debugger-friendly return address */

	/* cleanse other registers */
	xorq	%rbx, %rbx
	xorq	%rbp, %rbp
	xorq	%r12, %r12
	xorq	%r13, %r13
	xorq	%r14, %r14
	xorq	%r15, %r15

	cli

#ifdef CONFIG_X86_KPTI
	/* Switch to thread's page table. We have free registers so no need
	 * to involve the trampoline stack.
	 */
	movq	%gs:__x86_tss64_t_cpu_OFFSET, %rax
	movq	___cpu_t_current_OFFSET(%rax), %rax
	movq	_thread_offset_to_ptables(%rax), %rax
	movq	%rax, %cr3
#endif
	swapgs
	sysretq
